<!DOCTYPE html>
<html lang="zh-CN">
    <!-- title -->


    

<!-- keywords -->



<head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no">
    <meta name="author" content="Binisalegend">
    <meta name="renderer" content="webkit">
    <meta name="copyright" content="Binisalegend">
    
        <meta name="keywords" content="hexo,hexo-theme,hexo-blog">
    
    <meta name="description" content="">
    <meta name="description" content="这个是自己学着玩的（bushi">
<meta property="og:type" content="article">
<meta property="og:title" content="强化学习笔记">
<meta property="og:url" content="https://binisalegend.github.io/2023/11/08/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0/index.html">
<meta property="og:site_name" content="Binisalegend的博客">
<meta property="og:description" content="这个是自己学着玩的（bushi">
<meta property="og:locale" content="zh_CN">
<meta property="og:image" content="https://binisalegend.github.io/2023/11/08/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0/Pasted%20image%2020231008121208.png">
<meta property="article:published_time" content="2023-11-08T05:18:58.000Z">
<meta property="article:modified_time" content="2023-11-08T06:25:31.643Z">
<meta property="article:author" content="Binisalegend">
<meta property="article:tag" content="强化学习">
<meta name="twitter:card" content="summary">
<meta name="twitter:image" content="https://binisalegend.github.io/2023/11/08/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0/Pasted%20image%2020231008121208.png">
    <meta http-equiv="Cache-control" content="no-cache">
    <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
    <link rel="icon" href="/assets/favicon.ico">
    
    <title>强化学习笔记 · Binisalegend&#39;s Studio</title>
    <!-- /*! loadCSS. [c]2017 Filament Group, Inc. MIT License */
/* This file is meant as a standalone workflow for
- testing support for link[rel=preload]
- enabling async CSS loading in browsers that do not support rel=preload
- applying rel preload css once loaded, whether supported or not.
*/ -->
<script>
    (function (w) {
        'use strict'
        // rel=preload support test
        if (!w.loadCSS) {
            w.loadCSS = function () {}
        }
        // define on the loadCSS obj
        var rp = (loadCSS.relpreload = {})
        // rel=preload feature support test
        // runs once and returns a function for compat purposes
        rp.support = (function () {
            var ret
            try {
                ret = w.document.createElement('link').relList.supports('preload')
            } catch (e) {
                ret = false
            }
            return function () {
                return ret
            }
        })()

        // if preload isn't supported, get an asynchronous load by using a non-matching media attribute
        // then change that media back to its intended value on load
        rp.bindMediaToggle = function (link) {
            // remember existing media attr for ultimate state, or default to 'all'
            var finalMedia = link.media || 'all'

            function enableStylesheet() {
                link.media = finalMedia
            }

            // bind load handlers to enable media
            if (link.addEventListener) {
                link.addEventListener('load', enableStylesheet)
            } else if (link.attachEvent) {
                link.attachEvent('onload', enableStylesheet)
            }

            // Set rel and non-applicable media type to start an async request
            // note: timeout allows this to happen async to let rendering continue in IE
            setTimeout(function () {
                link.rel = 'stylesheet'
                link.media = 'only x'
            })
            // also enable media after 3 seconds,
            // which will catch very old browsers (android 2.x, old firefox) that don't support onload on link
            setTimeout(enableStylesheet, 3000)
        }

        // loop through link elements in DOM
        rp.poly = function () {
            // double check this to prevent external calls from running
            if (rp.support()) {
                return
            }
            var links = w.document.getElementsByTagName('link')
            for (var i = 0; i < links.length; i++) {
                var link = links[i]
                // qualify links to those with rel=preload and as=style attrs
                if (
                    link.rel === 'preload' &&
                    link.getAttribute('as') === 'style' &&
                    !link.getAttribute('data-loadcss')
                ) {
                    // prevent rerunning on link
                    link.setAttribute('data-loadcss', true)
                    // bind listeners to toggle media back
                    rp.bindMediaToggle(link)
                }
            }
        }

        // if unsupported, run the polyfill
        if (!rp.support()) {
            // run once at least
            rp.poly()

            // rerun poly on an interval until onload
            var run = w.setInterval(rp.poly, 500)
            if (w.addEventListener) {
                w.addEventListener('load', function () {
                    rp.poly()
                    w.clearInterval(run)
                })
            } else if (w.attachEvent) {
                w.attachEvent('onload', function () {
                    rp.poly()
                    w.clearInterval(run)
                })
            }
        }

        // commonjs
        if (typeof exports !== 'undefined') {
            exports.loadCSS = loadCSS
        } else {
            w.loadCSS = loadCSS
        }
    })(typeof global !== 'undefined' ? global : this)
</script>

    <style type="text/css">
    @font-face {
        font-family: 'Oswald-Regular';
        src: url("/font/Oswald-Regular.ttf");
    }

    body {
        margin: 0;
    }

    header,
    footer,
    .back-top,
    .sidebar,
    .container,
    .site-intro-meta,
    .toc-wrapper {
        display: none;
    }

    .site-intro {
        position: relative;
        z-index: 3;
        width: 100%;
        /* height: 50vh; */
        overflow: hidden;
    }

    .site-intro-placeholder {
        position: absolute;
        z-index: -2;
        top: 0;
        left: 0;
        width: calc(100% + 300px);
        height: 100%;
        background: repeating-linear-gradient(-45deg, #444 0, #444 80px, #333 80px, #333 160px);
        background-position: center center;
        transform: translate3d(-226px, 0, 0);
        animation: gradient-move 2.5s ease-out 0s infinite;
    }

    @keyframes gradient-move {
        0% {
            transform: translate3d(-226px, 0, 0);
        }
        100% {
            transform: translate3d(0, 0, 0);
        }
    }
</style>

    <link rel="preload" href="/css/style.css?v=20211217" as="style" onload="this.onload=null;this.rel='stylesheet'">
    <link rel="preload" href="/css/dark.css?v=20211217" as="style">
    <link rel="stylesheet" href="/css/dark.css">
    <link rel="stylesheet" href="/css/mobile.css?v=20211217" media="(max-width: 960px)">
    <link rel="preload" href="https://cdn.jsdelivr.net/npm/@fancyapps/fancybox@3.5.7/dist/jquery.fancybox.min.css" as="style" onload="this.onload=null;this.rel='stylesheet'">
    <link rel="preload" href="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js" as="script">
    <link rel="preload" href="/scripts/main.js?v=20211217" as="script">
    <link rel="preload" href="/scripts/dark.js?v=20211217" as="script">
    <link rel="preload" href="/font/Oswald-Regular.ttf" as="font" crossorigin>
    <link rel="preload" href="https://at.alicdn.com/t/font_327081_1dta1rlogw17zaor.woff" as="font" crossorigin>
    <!-- algolia -->
    
    <!-- 百度统计  -->
    
    <!-- 谷歌统计  -->
    
<!-- hexo injector head_end start -->
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.12.0/dist/katex.min.css">

<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/hexo-math@4.0.0/dist/style.css">
<!-- hexo injector head_end end --><meta name="generator" content="Hexo 6.3.0"></head>

    <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js"></script>
    <script type="text/javascript">
        if (typeof window.$ == undefined) {
            console.warn('jquery load from jsdelivr failed, will load local script')
            document.write('<script src="/lib/jquery.min.js" />')
        }
    </script>
    
        <body class="post-body">
    
        <!-- header -->
        <header class="header header-mobile">
    <!-- top read progress line -->
    <div class="header-element">
        <div class="read-progress"></div>
    </div>
    <!-- sidebar menu button -->
    <div class="header-element">
        
            <div class="header-sidebar-menu">
        
            
                <div style="padding-left: 1px;">&#xe775;</div>
            
        </div>
    </div>
    <!-- header actions -->
    <div class="header-actions">
        <!-- theme mode switch button -->
        <span class="header-theme-btn header-element">
            <i class="fas fa-adjust"></i>
        </span>
        <!-- back to home page text -->
        <span class="home-link header-element">
            <a href=/>锅中冰's Studio.</a>
        </span>
    </div>
    <!-- toggle banner for post layout -->
    
        
            <div class="banner">
        
            <div class="blog-title header-element">
                <a href="/">锅中冰&#39;s Studio.</a>
            </div>
            <div class="post-title header-element">
                <a href="#" class="post-name">强化学习笔记</a>
            </div>
        </div>
    
</header>

        <!-- fixed footer -->
        <footer class="footer-fixed">
    <!-- back to top button -->
    <div class="footer-fixed-element">
        
            <div class="back-top back-top-hidden">
        
        
            <div>&#xe639;</div>
        
        </div>
    </div>
</footer>

        <!-- wrapper -->
        <div class="wrapper">
            <div class="site-intro" style="







    height:50vh;

">
    
    <!-- 主页  -->
    
        
    <!-- 404页  -->
    
    <div class="site-intro-placeholder"></div>
    <div class="site-intro-img" style="background-image: url(https://source.unsplash.com/2560x800/?programming)"></div>
    <div class="site-intro-meta">
        <!-- 标题  -->
        <h1 class="intro-title">
            <!-- 主页  -->
            
                强化学习笔记
            <!-- 404 -->
            
        </h1>
        <!-- 副标题 -->
        <p class="intro-subtitle">
            <!-- 主页副标题  -->
            
                
            <!-- 404 -->
            
        </p>
        <!-- 文章页 meta -->
        
            <div class="post-intros">
                <!-- 文章页标签  -->
                
                    <div class= post-intro-tags >
    
    
        <a class="post-tag" href="javascript:void(0);" data-tags="强化学习">强化学习</a>
    
</div>

                
                <!-- 文章字数统计 -->
                
                    <div class="post-intro-read">
                        <span>字数统计: <span class="post-count word-count">592</span>阅读时长: <span class="post-count reading-time">2 min</span></span>
                    </div>
                
                <div class="post-intro-meta">
                    <!-- 撰写日期 -->
                    <span class="iconfont-archer post-intro-calander">&#xe676;</span>
                    <span class="post-intro-time">2023/11/08</span>
                    <!-- busuanzi -->
                    
                        <span id="busuanzi_container_page_pv" class="busuanzi-pv">
                            <span class="iconfont-archer post-intro-busuanzi">&#xe602;</span>
                            <span id="busuanzi_value_page_pv"></span>
                        </span>
                    
                    <!-- 文章分享 -->
                    <span class="share-wrapper">
                        <span class="iconfont-archer share-icon">&#xe71d;</span>
                        <span class="share-text">Share</span>
                        <ul class="share-list">
                            <li class="iconfont-archer share-qr" data-type="qr">&#xe75b;
                                <div class="share-qrcode"></div>
                            </li>
                            <li class="iconfont-archer" data-type="weibo">&#xe619;</li>
                            <li class="iconfont-archer" data-type="qzone">&#xe62e;</li>
                            <li class="iconfont-archer" data-type="twitter">&#xe634;</li>
                            <li class="iconfont-archer" data-type="facebook">&#xe67a;</li>
                        </ul>
                    </span>
                </div>
            </div>
        
    </div>
</div>

            <script>
  // get user agent
  function getBrowserVersions() {
    var u = window.navigator.userAgent
    return {
      userAgent: u,
      trident: u.indexOf('Trident') > -1, //IE内核
      presto: u.indexOf('Presto') > -1, //opera内核
      webKit: u.indexOf('AppleWebKit') > -1, //苹果、谷歌内核
      gecko: u.indexOf('Gecko') > -1 && u.indexOf('KHTML') == -1, //火狐内核
      mobile: !!u.match(/AppleWebKit.*Mobile.*/), //是否为移动终端
      ios: !!u.match(/\(i[^;]+;( U;)? CPU.+Mac OS X/), //ios终端
      android: u.indexOf('Android') > -1 || u.indexOf('Linux') > -1, //android终端或者uc浏览器
      iPhone: u.indexOf('iPhone') > -1 || u.indexOf('Mac') > -1, //是否为iPhone或者安卓QQ浏览器
      iPad: u.indexOf('iPad') > -1, //是否为iPad
      webApp: u.indexOf('Safari') == -1, //是否为web应用程序，没有头部与底部
      weixin: u.indexOf('MicroMessenger') == -1, //是否为微信浏览器
      uc: u.indexOf('UCBrowser') > -1, //是否为android下的UC浏览器
    }
  }
  var browser = {
    versions: getBrowserVersions(),
  }
  console.log('userAgent: ' + browser.versions.userAgent)

  // callback
  function fontLoaded() {
    console.log('font loaded')
    if (document.getElementsByClassName('site-intro-meta')) {
      document
        .getElementsByClassName('intro-title')[0]
        .classList.add('intro-fade-in')
      document
        .getElementsByClassName('intro-subtitle')[0]
        .classList.add('intro-fade-in')
      var postIntros = document.getElementsByClassName('post-intros')[0]
      if (postIntros) {
        postIntros.classList.add('post-fade-in')
      }
    }
  }

  // UC不支持跨域，所以直接显示
  function asyncCb() {
    if (browser.versions.uc) {
      console.log('UCBrowser')
      fontLoaded()
    } else {
      WebFont.load({
        custom: {
          families: ['Oswald-Regular'],
        },
        loading: function () {
          // 所有字体开始加载
          // console.log('font loading');
        },
        active: function () {
          // 所有字体已渲染
          fontLoaded()
        },
        inactive: function () {
          // 字体预加载失败，无效字体或浏览器不支持加载
          console.log('inactive: timeout')
          fontLoaded()
        },
        timeout: 5000, // Set the timeout to two seconds
      })
    }
  }

  function asyncErr() {
    console.warn('script load from CDN failed, will load local script')
  }

  // load webfont-loader async, and add callback function
  function async(u, cb, err) {
    var d = document,
      t = 'script',
      o = d.createElement(t),
      s = d.getElementsByTagName(t)[0]
    o.src = u
    if (cb) {
      o.addEventListener(
        'load',
        function (e) {
          cb(null, e)
        },
        false
      )
    }
    if (err) {
      o.addEventListener(
        'error',
        function (e) {
          err(null, e)
        },
        false
      )
    }
    s.parentNode.insertBefore(o, s)
  }

  var asyncLoadWithFallBack = function (arr, success, reject) {
    var currReject = function () {
      reject()
      arr.shift()
      if (arr.length) async(arr[0], success, currReject)
    }

    async(arr[0], success, currReject)
  }

  asyncLoadWithFallBack(
    [
      'https://cdn.jsdelivr.net/npm/webfontloader@1.6.28/webfontloader.min.js',
      'https://cdn.bootcss.com/webfont/1.6.28/webfontloader.js',
      "/lib/webfontloader.min.js",
    ],
    asyncCb,
    asyncErr
  )
</script>

            <img class="loading" src="/assets/loading.svg" style="display: block; margin: 6rem auto 0 auto; width: 6rem; height: 6rem;" />
            <div class="container container-unloaded">
                <main class="main post-page">
    <article class="article-entry">
        <p>这个是自己学着玩的（bushi</p>
<span id="more"></span>

<h1 id="强化学习概念介绍"><a href="#强化学习概念介绍" class="headerlink" title="强化学习概念介绍"></a>强化学习概念介绍</h1><p>简单理解来说是一种通过试错来确定完成某项任务的最佳步骤的方法，其学习系统没有办法像一些机器学习一样被告知应该做出什么样的行为，需要通过不断尝试来发现如何实现奖赏值的最大化；当前行为不仅影响即时奖励，同样会影响下一步包括最终可能获得的奖励<br>Concept：找到一个最佳策略 <code>policy</code> ,可以让本体 <code>agent</code> 在特定环境 <code>environment</code> 中，根据当前状态 <code>state</code>,做出行为 <code>action</code>, 从而获得最大回报 <code>Return</code></p>
<h1 id="机器狗强化学习实例-RoomDog-py"><a href="#机器狗强化学习实例-RoomDog-py" class="headerlink" title="机器狗强化学习实例(RoomDog.py)"></a>机器狗强化学习实例(<a href="%22D:%5C%E5%A4%A7%E5%AD%A6%E8%B5%84%E6%96%99%5CRoad-To-CSAI%5CReinforcementLearning%5CRoomDog.py%22">RoomDog.py</a>)</h1><p>学习链接：<a target="_blank" rel="noopener" href="https://zhuanlan.zhihu.com/p/36669905">一个简单的强化学习例子来理解Q-learning</a>  <a target="_blank" rel="noopener" href="https://blog.csdn.net/qq_43655453/article/details/107296374">强化学习Q-Learning算法学习笔记</a></p>
<ul>
<li>应用算法: Q-Learning算法<br>术语：<code>state</code>(状态)，<code>action</code>(行为)</li>
<li>丢！终于看懂怎么转化矩阵了，如下：<ol>
<li>首先，简单思路是有几个房间，矩阵就有几行几列</li>
<li>可以将行列分别理解成 <code>state</code> 和 <code>action</code>，即例如从房间4去房间2就可以用矩阵元素[4, 2]的状态来表示</li>
<li>在路径连通与否的表示上，如果从A能到B，则[A,B]就表示为0，反之为-1；特别的，如果B是最终要到达的目的地，此时AB连通就应表示为一个较大的奖赏值R<br>像题目中的房间联通情况 <img src="/2023/11/08/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0/Pasted%20image%2020231008121208.png">可以表示为<mjx-container class="MathJax" jax="SVG"><svg style="vertical-align: -8.484ex;" xmlns="http://www.w3.org/2000/svg" width="36.832ex" height="18.1ex" role="img" focusable="false" viewbox="0 -4250 16279.6 8000" xmlns:xlink="http://www.w3.org/1999/xlink"><defs><path id="MJX-1-TEX-D-211D" d="M17 665Q17 672 28 683H221Q415 681 439 677Q461 673 481 667T516 654T544 639T566 623T584 607T597 592T607 578T614 565T618 554L621 548Q626 530 626 497Q626 447 613 419Q578 348 473 326L455 321Q462 310 473 292T517 226T578 141T637 72T686 35Q705 30 705 16Q705 7 693 -1H510Q503 6 404 159L306 310H268V183Q270 67 271 59Q274 42 291 38Q295 37 319 35Q344 35 353 28Q362 17 353 3L346 -1H28Q16 5 16 16Q16 35 55 35Q96 38 101 52Q106 60 106 341T101 632Q95 645 55 648Q17 648 17 665ZM241 35Q238 42 237 45T235 78T233 163T233 337V621L237 635L244 648H133Q136 641 137 638T139 603T141 517T141 341Q141 131 140 89T134 37Q133 36 133 35H241ZM457 496Q457 540 449 570T425 615T400 634T377 643Q374 643 339 648Q300 648 281 635Q271 628 270 610T268 481V346H284Q327 346 375 352Q421 364 439 392T457 496ZM492 537T492 496T488 427T478 389T469 371T464 361Q464 360 465 360Q469 360 497 370Q593 400 593 495Q593 592 477 630L457 637L461 626Q474 611 488 561Q492 537 492 496ZM464 243Q411 317 410 317Q404 317 401 315Q384 315 370 312H346L526 35H619L606 50Q553 109 464 243Z"/><path id="MJX-1-TEX-N-3D" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"/><path id="MJX-1-TEX-S4-23A1" d="M319 -645V1154H666V1070H403V-645H319Z"/><path id="MJX-1-TEX-S4-23A3" d="M319 -644V1155H403V-560H666V-644H319Z"/><path id="MJX-1-TEX-S4-23A2" d="M319 0V602H403V0H319Z"/><path id="MJX-1-TEX-N-2212" d="M84 237T84 250T98 270H679Q694 262 694 250T679 230H98Q84 237 84 250Z"/><path id="MJX-1-TEX-N-31" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"/><path id="MJX-1-TEX-N-30" d="M96 585Q152 666 249 666Q297 666 345 640T423 548Q460 465 460 320Q460 165 417 83Q397 41 362 16T301 -15T250 -22Q224 -22 198 -16T137 16T82 83Q39 165 39 320Q39 494 96 585ZM321 597Q291 629 250 629Q208 629 178 597Q153 571 145 525T137 333Q137 175 145 125T181 46Q209 16 250 16Q290 16 318 46Q347 76 354 130T362 333Q362 478 354 524T321 597Z"/><path id="MJX-1-TEX-S4-23A4" d="M0 1070V1154H347V-645H263V1070H0Z"/><path id="MJX-1-TEX-S4-23A6" d="M263 -560V1155H347V-644H0V-560H263Z"/><path id="MJX-1-TEX-S4-23A5" d="M263 0V602H347V0H263Z"/></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="scale(1,-1)"><g data-mml-node="math"><g data-mml-node="TeXAtom" data-mjx-texclass="ORD"><g data-mml-node="mi"><use data-c="211D" xlink:href="#MJX-1-TEX-D-211D"/></g></g><g data-mml-node="mo" transform="translate(999.8,0)"><use data-c="3D" xlink:href="#MJX-1-TEX-N-3D"/></g><g data-mml-node="mrow" transform="translate(2055.6,0)"><g data-mml-node="mo"><use data-c="23A1" xlink:href="#MJX-1-TEX-S4-23A1" transform="translate(0,3096)"/><use data-c="23A3" xlink:href="#MJX-1-TEX-S4-23A3" transform="translate(0,-3106)"/><svg width="667" height="4602" y="-2051" x="0" viewbox="0 1150.5 667 4602"><use data-c="23A2" xlink:href="#MJX-1-TEX-S4-23A2" transform="scale(1,11.467)"/></svg></g><g data-mml-node="mtable" transform="translate(667,0)"><g data-mml-node="mtr" transform="translate(0,3500)"><g data-mml-node="mtd"><g data-mml-node="mo"><use data-c="2212" xlink:href="#MJX-1-TEX-N-2212"/></g><g data-mml-node="mn" transform="translate(778,0)"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/></g></g><g data-mml-node="mtd" transform="translate(2278,0)"><g data-mml-node="mo"><use data-c="2212" xlink:href="#MJX-1-TEX-N-2212"/></g><g data-mml-node="mn" transform="translate(778,0)"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/></g></g><g data-mml-node="mtd" transform="translate(4556,0)"><g data-mml-node="mo"><use data-c="2212" xlink:href="#MJX-1-TEX-N-2212"/></g><g data-mml-node="mn" transform="translate(778,0)"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/></g></g><g data-mml-node="mtd" transform="translate(6834,0)"><g data-mml-node="mo"><use data-c="2212" xlink:href="#MJX-1-TEX-N-2212"/></g><g data-mml-node="mn" transform="translate(778,0)"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/></g></g><g data-mml-node="mtd" transform="translate(9501,0)"><g data-mml-node="mn"><use data-c="30" xlink:href="#MJX-1-TEX-N-30"/></g></g><g data-mml-node="mtd" transform="translate(11501,0)"><g data-mml-node="mo"><use data-c="2212" xlink:href="#MJX-1-TEX-N-2212"/></g><g data-mml-node="mn" transform="translate(778,0)"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/></g></g></g><g data-mml-node="mtr" transform="translate(0,2100)"><g data-mml-node="mtd"><g data-mml-node="mo"><use data-c="2212" xlink:href="#MJX-1-TEX-N-2212"/></g><g data-mml-node="mn" transform="translate(778,0)"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/></g></g><g data-mml-node="mtd" transform="translate(2278,0)"><g data-mml-node="mo"><use data-c="2212" xlink:href="#MJX-1-TEX-N-2212"/></g><g data-mml-node="mn" transform="translate(778,0)"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/></g></g><g data-mml-node="mtd" transform="translate(4556,0)"><g data-mml-node="mo"><use data-c="2212" xlink:href="#MJX-1-TEX-N-2212"/></g><g data-mml-node="mn" transform="translate(778,0)"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/></g></g><g data-mml-node="mtd" transform="translate(7223,0)"><g data-mml-node="mn"><use data-c="30" xlink:href="#MJX-1-TEX-N-30"/></g></g><g data-mml-node="mtd" transform="translate(9112,0)"><g data-mml-node="mo"><use data-c="2212" xlink:href="#MJX-1-TEX-N-2212"/></g><g data-mml-node="mn" transform="translate(778,0)"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/></g></g><g data-mml-node="mtd" transform="translate(11390,0)"><g data-mml-node="mn"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/><use data-c="30" xlink:href="#MJX-1-TEX-N-30" transform="translate(500,0)"/><use data-c="30" xlink:href="#MJX-1-TEX-N-30" transform="translate(1000,0)"/></g></g></g><g data-mml-node="mtr" transform="translate(0,700)"><g data-mml-node="mtd"><g data-mml-node="mo"><use data-c="2212" xlink:href="#MJX-1-TEX-N-2212"/></g><g data-mml-node="mn" transform="translate(778,0)"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/></g></g><g data-mml-node="mtd" transform="translate(2278,0)"><g data-mml-node="mo"><use data-c="2212" xlink:href="#MJX-1-TEX-N-2212"/></g><g data-mml-node="mn" transform="translate(778,0)"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/></g></g><g data-mml-node="mtd" transform="translate(4556,0)"><g data-mml-node="mo"><use data-c="2212" xlink:href="#MJX-1-TEX-N-2212"/></g><g data-mml-node="mn" transform="translate(778,0)"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/></g></g><g data-mml-node="mtd" transform="translate(7223,0)"><g data-mml-node="mn"><use data-c="30" xlink:href="#MJX-1-TEX-N-30"/></g></g><g data-mml-node="mtd" transform="translate(9112,0)"><g data-mml-node="mo"><use data-c="2212" xlink:href="#MJX-1-TEX-N-2212"/></g><g data-mml-node="mn" transform="translate(778,0)"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/></g></g><g data-mml-node="mtd" transform="translate(11501,0)"><g data-mml-node="mo"><use data-c="2212" xlink:href="#MJX-1-TEX-N-2212"/></g><g data-mml-node="mn" transform="translate(778,0)"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/></g></g></g><g data-mml-node="mtr" transform="translate(0,-700)"><g data-mml-node="mtd"><g data-mml-node="mo"><use data-c="2212" xlink:href="#MJX-1-TEX-N-2212"/></g><g data-mml-node="mn" transform="translate(778,0)"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/></g></g><g data-mml-node="mtd" transform="translate(2667,0)"><g data-mml-node="mn"><use data-c="30" xlink:href="#MJX-1-TEX-N-30"/></g></g><g data-mml-node="mtd" transform="translate(4945,0)"><g data-mml-node="mn"><use data-c="30" xlink:href="#MJX-1-TEX-N-30"/></g></g><g data-mml-node="mtd" transform="translate(6834,0)"><g data-mml-node="mo"><use data-c="2212" xlink:href="#MJX-1-TEX-N-2212"/></g><g data-mml-node="mn" transform="translate(778,0)"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/></g></g><g data-mml-node="mtd" transform="translate(9501,0)"><g data-mml-node="mn"><use data-c="30" xlink:href="#MJX-1-TEX-N-30"/></g></g><g data-mml-node="mtd" transform="translate(11501,0)"><g data-mml-node="mo"><use data-c="2212" xlink:href="#MJX-1-TEX-N-2212"/></g><g data-mml-node="mn" transform="translate(778,0)"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/></g></g></g><g data-mml-node="mtr" transform="translate(0,-2100)"><g data-mml-node="mtd"><g data-mml-node="mo"><use data-c="2212" xlink:href="#MJX-1-TEX-N-2212"/></g><g data-mml-node="mn" transform="translate(778,0)"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/></g></g><g data-mml-node="mtd" transform="translate(2667,0)"><g data-mml-node="mn"><use data-c="30" xlink:href="#MJX-1-TEX-N-30"/></g></g><g data-mml-node="mtd" transform="translate(4556,0)"><g data-mml-node="mo"><use data-c="2212" xlink:href="#MJX-1-TEX-N-2212"/></g><g data-mml-node="mn" transform="translate(778,0)"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/></g></g><g data-mml-node="mtd" transform="translate(7223,0)"><g data-mml-node="mn"><use data-c="30" xlink:href="#MJX-1-TEX-N-30"/></g></g><g data-mml-node="mtd" transform="translate(9112,0)"><g data-mml-node="mo"><use data-c="2212" xlink:href="#MJX-1-TEX-N-2212"/></g><g data-mml-node="mn" transform="translate(778,0)"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/></g></g><g data-mml-node="mtd" transform="translate(11390,0)"><g data-mml-node="mn"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/><use data-c="30" xlink:href="#MJX-1-TEX-N-30" transform="translate(500,0)"/><use data-c="30" xlink:href="#MJX-1-TEX-N-30" transform="translate(1000,0)"/></g></g></g><g data-mml-node="mtr" transform="translate(0,-3500)"><g data-mml-node="mtd" transform="translate(389,0)"><g data-mml-node="mn"><use data-c="30" xlink:href="#MJX-1-TEX-N-30"/></g></g><g data-mml-node="mtd" transform="translate(2278,0)"><g data-mml-node="mo"><use data-c="2212" xlink:href="#MJX-1-TEX-N-2212"/></g><g data-mml-node="mn" transform="translate(778,0)"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/></g></g><g data-mml-node="mtd" transform="translate(4556,0)"><g data-mml-node="mo"><use data-c="2212" xlink:href="#MJX-1-TEX-N-2212"/></g><g data-mml-node="mn" transform="translate(778,0)"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/></g></g><g data-mml-node="mtd" transform="translate(6834,0)"><g data-mml-node="mo"><use data-c="2212" xlink:href="#MJX-1-TEX-N-2212"/></g><g data-mml-node="mn" transform="translate(778,0)"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/></g></g><g data-mml-node="mtd" transform="translate(9501,0)"><g data-mml-node="mn"><use data-c="30" xlink:href="#MJX-1-TEX-N-30"/></g></g><g data-mml-node="mtd" transform="translate(11390,0)"><g data-mml-node="mn"><use data-c="31" xlink:href="#MJX-1-TEX-N-31"/><use data-c="30" xlink:href="#MJX-1-TEX-N-30" transform="translate(500,0)"/><use data-c="30" xlink:href="#MJX-1-TEX-N-30" transform="translate(1000,0)"/></g></g></g></g><g data-mml-node="mo" transform="translate(13557,0)"><use data-c="23A4" xlink:href="#MJX-1-TEX-S4-23A4" transform="translate(0,3096)"/><use data-c="23A6" xlink:href="#MJX-1-TEX-S4-23A6" transform="translate(0,-3106)"/><svg width="667" height="4602" y="-2051" x="0" viewbox="0 1150.5 667 4602"><use data-c="23A5" xlink:href="#MJX-1-TEX-S4-23A5" transform="scale(1,11.467)"/></svg></g></g></g></g></svg></mjx-container></li>
</ol>
</li>
<li>贝尔曼方程：<mjx-container class="MathJax" jax="SVG"><svg style="vertical-align: -0.566ex;" xmlns="http://www.w3.org/2000/svg" width="35.424ex" height="2.312ex" role="img" focusable="false" viewbox="0 -772 15657.4 1022" xmlns:xlink="http://www.w3.org/1999/xlink"><defs><path id="MJX-1-TEX-I-1D444" d="M399 -80Q399 -47 400 -30T402 -11V-7L387 -11Q341 -22 303 -22Q208 -22 138 35T51 201Q50 209 50 244Q50 346 98 438T227 601Q351 704 476 704Q514 704 524 703Q621 689 680 617T740 435Q740 255 592 107Q529 47 461 16L444 8V3Q444 2 449 -24T470 -66T516 -82Q551 -82 583 -60T625 -3Q631 11 638 11Q647 11 649 2Q649 -6 639 -34T611 -100T557 -165T481 -194Q399 -194 399 -87V-80ZM636 468Q636 523 621 564T580 625T530 655T477 665Q429 665 379 640Q277 591 215 464T153 216Q153 110 207 59Q231 38 236 38V46Q236 86 269 120T347 155Q372 155 390 144T417 114T429 82T435 55L448 64Q512 108 557 185T619 334T636 468ZM314 18Q362 18 404 39L403 49Q399 104 366 115Q354 117 347 117Q344 117 341 117T337 118Q317 118 296 98T274 52Q274 18 314 18Z"/><path id="MJX-1-TEX-N-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"/><path id="MJX-1-TEX-I-1D460" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"/><path id="MJX-1-TEX-N-2C" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"/><path id="MJX-1-TEX-I-1D44E" d="M33 157Q33 258 109 349T280 441Q331 441 370 392Q386 422 416 422Q429 422 439 414T449 394Q449 381 412 234T374 68Q374 43 381 35T402 26Q411 27 422 35Q443 55 463 131Q469 151 473 152Q475 153 483 153H487Q506 153 506 144Q506 138 501 117T481 63T449 13Q436 0 417 -8Q409 -10 393 -10Q359 -10 336 5T306 36L300 51Q299 52 296 50Q294 48 292 46Q233 -10 172 -10Q117 -10 75 30T33 157ZM351 328Q351 334 346 350T323 385T277 405Q242 405 210 374T160 293Q131 214 119 129Q119 126 119 118T118 106Q118 61 136 44T179 26Q217 26 254 59T298 110Q300 114 325 217T351 328Z"/><path id="MJX-1-TEX-N-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"/><path id="MJX-1-TEX-N-3D" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"/><path id="MJX-1-TEX-I-1D445" d="M230 637Q203 637 198 638T193 649Q193 676 204 682Q206 683 378 683Q550 682 564 680Q620 672 658 652T712 606T733 563T739 529Q739 484 710 445T643 385T576 351T538 338L545 333Q612 295 612 223Q612 212 607 162T602 80V71Q602 53 603 43T614 25T640 16Q668 16 686 38T712 85Q717 99 720 102T735 105Q755 105 755 93Q755 75 731 36Q693 -21 641 -21H632Q571 -21 531 4T487 82Q487 109 502 166T517 239Q517 290 474 313Q459 320 449 321T378 323H309L277 193Q244 61 244 59Q244 55 245 54T252 50T269 48T302 46H333Q339 38 339 37T336 19Q332 6 326 0H311Q275 2 180 2Q146 2 117 2T71 2T50 1Q33 1 33 10Q33 12 36 24Q41 43 46 45Q50 46 61 46H67Q94 46 127 49Q141 52 146 61Q149 65 218 339T287 628Q287 635 230 637ZM630 554Q630 586 609 608T523 636Q521 636 500 636T462 637H440Q393 637 386 627Q385 624 352 494T319 361Q319 360 388 360Q466 361 492 367Q556 377 592 426Q608 449 619 486T630 554Z"/><path id="MJX-1-TEX-N-2B" d="M56 237T56 250T70 270H369V420L370 570Q380 583 389 583Q402 583 409 568V270H707Q722 262 722 250T707 230H409V-68Q401 -82 391 -82H389H387Q375 -82 369 -68V230H70Q56 237 56 250Z"/><path id="MJX-1-TEX-I-1D6FE" d="M31 249Q11 249 11 258Q11 275 26 304T66 365T129 418T206 441Q233 441 239 440Q287 429 318 386T371 255Q385 195 385 170Q385 166 386 166L398 193Q418 244 443 300T486 391T508 430Q510 431 524 431H537Q543 425 543 422Q543 418 522 378T463 251T391 71Q385 55 378 6T357 -100Q341 -165 330 -190T303 -216Q286 -216 286 -188Q286 -138 340 32L346 51L347 69Q348 79 348 100Q348 257 291 317Q251 355 196 355Q148 355 108 329T51 260Q49 251 47 251Q45 249 31 249Z"/><path id="MJX-1-TEX-N-2219" d="M55 251Q55 328 112 386T249 444T386 388T444 249Q444 171 388 113T250 55Q170 55 113 112T55 251Z"/><path id="MJX-1-TEX-N-6D" d="M41 46H55Q94 46 102 60V68Q102 77 102 91T102 122T103 161T103 203Q103 234 103 269T102 328V351Q99 370 88 376T43 385H25V408Q25 431 27 431L37 432Q47 433 65 434T102 436Q119 437 138 438T167 441T178 442H181V402Q181 364 182 364T187 369T199 384T218 402T247 421T285 437Q305 442 336 442Q351 442 364 440T387 434T406 426T421 417T432 406T441 395T448 384T452 374T455 366L457 361L460 365Q463 369 466 373T475 384T488 397T503 410T523 422T546 432T572 439T603 442Q729 442 740 329Q741 322 741 190V104Q741 66 743 59T754 49Q775 46 803 46H819V0H811L788 1Q764 2 737 2T699 3Q596 3 587 0H579V46H595Q656 46 656 62Q657 64 657 200Q656 335 655 343Q649 371 635 385T611 402T585 404Q540 404 506 370Q479 343 472 315T464 232V168V108Q464 78 465 68T468 55T477 49Q498 46 526 46H542V0H534L510 1Q487 2 460 2T422 3Q319 3 310 0H302V46H318Q379 46 379 62Q380 64 380 200Q379 335 378 343Q372 371 358 385T334 402T308 404Q263 404 229 370Q202 343 195 315T187 232V168V108Q187 78 188 68T191 55T200 49Q221 46 249 46H265V0H257L234 1Q210 2 183 2T145 3Q42 3 33 0H25V46H41Z"/><path id="MJX-1-TEX-N-61" d="M137 305T115 305T78 320T63 359Q63 394 97 421T218 448Q291 448 336 416T396 340Q401 326 401 309T402 194V124Q402 76 407 58T428 40Q443 40 448 56T453 109V145H493V106Q492 66 490 59Q481 29 455 12T400 -6T353 12T329 54V58L327 55Q325 52 322 49T314 40T302 29T287 17T269 6T247 -2T221 -8T190 -11Q130 -11 82 20T34 107Q34 128 41 147T68 188T116 225T194 253T304 268H318V290Q318 324 312 340Q290 411 215 411Q197 411 181 410T156 406T148 403Q170 388 170 359Q170 334 154 320ZM126 106Q126 75 150 51T209 26Q247 26 276 49T315 109Q317 116 318 175Q318 233 317 233Q309 233 296 232T251 223T193 203T147 166T126 106Z"/><path id="MJX-1-TEX-N-78" d="M201 0Q189 3 102 3Q26 3 17 0H11V46H25Q48 47 67 52T96 61T121 78T139 96T160 122T180 150L226 210L168 288Q159 301 149 315T133 336T122 351T113 363T107 370T100 376T94 379T88 381T80 383Q74 383 44 385H16V431H23Q59 429 126 429Q219 429 229 431H237V385Q201 381 201 369Q201 367 211 353T239 315T268 274L272 270L297 304Q329 345 329 358Q329 364 327 369T322 376T317 380T310 384L307 385H302V431H309Q324 428 408 428Q487 428 493 431H499V385H492Q443 385 411 368Q394 360 377 341T312 257L296 236L358 151Q424 61 429 57T446 50Q464 46 499 46H516V0H510H502Q494 1 482 1T457 2T432 2T414 3Q403 3 377 3T327 1L304 0H295V46H298Q309 46 320 51T331 63Q331 65 291 120L250 175Q249 174 219 133T185 88Q181 83 181 74Q181 63 188 55T206 46Q208 46 208 23V0H201Z"/><path id="MJX-1-TEX-N-7B" d="M434 -231Q434 -244 428 -250H410Q281 -250 230 -184Q225 -177 222 -172T217 -161T213 -148T211 -133T210 -111T209 -84T209 -47T209 0Q209 21 209 53Q208 142 204 153Q203 154 203 155Q189 191 153 211T82 231Q71 231 68 234T65 250T68 266T82 269Q116 269 152 289T203 345Q208 356 208 377T209 529V579Q209 634 215 656T244 698Q270 724 324 740Q361 748 377 749Q379 749 390 749T408 750H428Q434 744 434 732Q434 719 431 716Q429 713 415 713Q362 710 332 689T296 647Q291 634 291 499V417Q291 370 288 353T271 314Q240 271 184 255L170 250L184 245Q202 239 220 230T262 196T290 137Q291 131 291 1Q291 -134 296 -147Q306 -174 339 -192T415 -213Q429 -213 431 -216Q434 -219 434 -231Z"/><path id="MJX-1-TEX-N-2DC" d="M179 601Q164 601 151 595T131 584T111 565L97 577L83 588Q83 589 95 603T121 633T142 654Q165 668 187 668T253 650T320 632Q335 632 348 638T368 649T388 668L402 656L416 645Q375 586 344 572Q330 565 313 565Q292 565 248 583T179 601Z"/><path id="MJX-1-TEX-SO-2DC" d="M374 597Q337 597 269 627T160 658Q101 658 34 606L24 597L12 611Q1 624 1 626Q1 627 27 648T55 671Q120 722 182 722Q219 722 286 692T395 661Q454 661 521 713L531 722L543 708Q554 695 554 693Q554 692 528 671T500 648Q434 597 374 597Z"/><path id="MJX-1-TEX-N-A0" d=""/></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="scale(1,-1)"><g data-mml-node="math"><g data-mml-node="mi"><use data-c="1D444" xlink:href="#MJX-1-TEX-I-1D444"/></g><g data-mml-node="mo" transform="translate(791,0)"><use data-c="28" xlink:href="#MJX-1-TEX-N-28"/></g><g data-mml-node="mi" transform="translate(1180,0)"><use data-c="1D460" xlink:href="#MJX-1-TEX-I-1D460"/></g><g data-mml-node="mo" transform="translate(1649,0)"><use data-c="2C" xlink:href="#MJX-1-TEX-N-2C"/></g><g data-mml-node="mi" transform="translate(2093.7,0)"><use data-c="1D44E" xlink:href="#MJX-1-TEX-I-1D44E"/></g><g data-mml-node="mo" transform="translate(2622.7,0)"><use data-c="29" xlink:href="#MJX-1-TEX-N-29"/></g><g data-mml-node="mo" transform="translate(3289.4,0)"><use data-c="3D" xlink:href="#MJX-1-TEX-N-3D"/></g><g data-mml-node="mi" transform="translate(4345.2,0)"><use data-c="1D445" xlink:href="#MJX-1-TEX-I-1D445"/></g><g data-mml-node="mo" transform="translate(5104.2,0)"><use data-c="28" xlink:href="#MJX-1-TEX-N-28"/></g><g data-mml-node="mi" transform="translate(5493.2,0)"><use data-c="1D460" xlink:href="#MJX-1-TEX-I-1D460"/></g><g data-mml-node="mo" transform="translate(5962.2,0)"><use data-c="2C" xlink:href="#MJX-1-TEX-N-2C"/></g><g data-mml-node="mi" transform="translate(6406.9,0)"><use data-c="1D44E" xlink:href="#MJX-1-TEX-I-1D44E"/></g><g data-mml-node="mo" transform="translate(6935.9,0)"><use data-c="29" xlink:href="#MJX-1-TEX-N-29"/></g><g data-mml-node="mo" transform="translate(7547.1,0)"><use data-c="2B" xlink:href="#MJX-1-TEX-N-2B"/></g><g data-mml-node="mi" transform="translate(8547.3,0)"><use data-c="1D6FE" xlink:href="#MJX-1-TEX-I-1D6FE"/></g><g data-mml-node="mo" transform="translate(9312.6,0)"><use data-c="2219" xlink:href="#MJX-1-TEX-N-2219"/></g><g data-mml-node="mo" transform="translate(10034.8,0)"><use data-c="6D" xlink:href="#MJX-1-TEX-N-6D"/><use data-c="61" xlink:href="#MJX-1-TEX-N-61" transform="translate(833,0)"/><use data-c="78" xlink:href="#MJX-1-TEX-N-78" transform="translate(1333,0)"/></g><g data-mml-node="mo" transform="translate(11895.8,0)"><use data-c="7B" xlink:href="#MJX-1-TEX-N-7B"/></g><g data-mml-node="mi" transform="translate(12395.8,0)"><use data-c="1D444" xlink:href="#MJX-1-TEX-I-1D444"/></g><g data-mml-node="mo" transform="translate(13186.8,0)"><use data-c="28" xlink:href="#MJX-1-TEX-N-28"/></g><g data-mml-node="TeXAtom" data-mjx-texclass="ORD" transform="translate(13575.8,0)"><g data-mml-node="mover"><g data-mml-node="mi"><use data-c="1D460" xlink:href="#MJX-1-TEX-I-1D460"/></g><g data-mml-node="mo" transform="translate(290.1,-18) translate(-250 0)"><use data-c="2DC" xlink:href="#MJX-1-TEX-N-2DC"/></g></g></g><g data-mml-node="mo" transform="translate(14044.8,0)"><use data-c="2C" xlink:href="#MJX-1-TEX-N-2C"/></g><g data-mml-node="TeXAtom" data-mjx-texclass="ORD" transform="translate(14489.4,0)"><g data-mml-node="mover"><g data-mml-node="mi"><use data-c="1D44E" xlink:href="#MJX-1-TEX-I-1D44E"/></g><g data-mml-node="mo" transform="translate(264.5,-50) translate(-278 0)"><use data-c="2DC" xlink:href="#MJX-1-TEX-SO-2DC"/></g></g></g><g data-mml-node="mo" transform="translate(15018.4,0)"><use data-c="29" xlink:href="#MJX-1-TEX-N-29"/></g><g data-mml-node="mtext" transform="translate(15407.4,0)"><use data-c="A0" xlink:href="#MJX-1-TEX-N-A0"/></g></g></g></svg></mjx-container>}<br>其中$Q(s, a)$表示当前的状态和行为，$Q(\widetilde{s},\widetilde{a})$表示下一过程的状态和行为,$\gamma$表示位于0 1之间的折扣系数，表示模型的远见程度，$\gamma$越小表示当下的Reward奖励函数越比未来的重要</li>
<li><big>Q-Learning的算法步骤</big></li>
</ul>
<ol>
<li>设置γ参数和R矩阵的环境奖励</li>
<li>初始化矩阵Q为0</li>
<li>对于每一个状态<br>(1) 随机选择一个状态<br>(2) do (while unreached the goal)<br>a. 在当前状态所有可能的行动中选择一个<br>b. 使用这个可能行动然后分析到达下一个状态<br>c. 基于所有在当前状态下可能的行动获得最大值Q<br>d. 理由贝尔曼方程计算当前状态下的Q</li>
</ol>

    </article>
    <!-- license -->
    
        <div class="license-wrapper">
            <p>原文作者：<a href="https://binisalegend.github.io">Binisalegend</a>
            <p>原文链接：<a href="https://binisalegend.github.io/2023/11/08/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0/">https://binisalegend.github.io/2023/11/08/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0/</a>
            <p>发表日期：<a href="https://binisalegend.github.io/2023/11/08/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0/">November 8th 2023, 1:18:58 pm</a>
            <p>更新日期：<a href="https://binisalegend.github.io/2023/11/08/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0/">November 8th 2023, 2:25:31 pm</a>
            <p>版权声明：本文采用<a rel="license noopener" target="_blank" href="http://creativecommons.org/licenses/by-nc/4.0/">知识共享署名-非商业性使用 4.0 国际许可协议</a>进行许可</p>
        </div>
    
    <!-- paginator -->
    <ul class="post-paginator">
        <li class="next">
            
                <div class="nextSlogan">Next Post</div>
                <a href="/2023/11/08/C++%E4%B8%AD%E7%9A%84STL%E5%AE%B9%E5%99%A8/" title="C++中的STL容器">
                    <div class="nextTitle">C++中的STL容器</div>
                </a>
            
        </li>
        <li class="previous">
            
                <div class="prevSlogan">Previous Post</div>
                <a href="/2023/11/08/%E5%A4%A7%E4%BA%8C%E4%B8%8A%E5%B0%8F%E5%AD%A6%E6%9C%9F%E7%A8%8B%E8%AE%BE%E8%A7%A3%E9%A2%98%E6%80%9D%E8%B7%AF/" title="大二上小学期程设解题思路">
                    <div class="prevTitle">大二上小学期程设解题思路</div>
                </a>
            
        </li>
    </ul>
    <!-- comment -->
    
        <div class="post-comment">
            <!-- 来必力 City 版安装代码 -->

    <div id="lv-container" data-id="city" data-uid= MTAyMC81OTE3Mi8zNTYzNA==>
        <script type="text/javascript">
            (function (d, s) {
                var j, e = d.getElementsByTagName(s)[0];
                if (typeof LivereTower === 'function') { return; }
                j = d.createElement(s);
                j.src = 'https://cdn-city.livere.com/js/embed.dist.js';
                j.async = true;

                e.parentNode.insertBefore(j, e);
            })(document, 'script');
        </script>
        <noscript>为正常使用来必力评论功能请激活 JavaScript</noscript>
    </div>


            

            

            

            <!-- utteranc评论 -->


            <!-- partial('_partial/comment/changyan') -->
            <!--PC版-->


            
            

            

        </div>
    
    <!-- timeliness note -->
    <!-- idea from: https://hexo.fluid-dev.com/posts/hexo-injector/#%E6%96%87%E7%AB%A0%E6%97%B6%E6%95%88%E6%80%A7%E6%8F%90%E7%A4%BA -->
    
    <!-- Mathjax -->
    
</main>

                <!-- profile -->
                
            </div>
            <footer class="footer footer-unloaded">
    <!-- social  -->
    
        <div class="social">
            
    
        
            
                <a href="mailto:2446479002@qq.com" class="iconfont-archer email" title=email ></a>
            
        
    
        
            
                <a href="https://github.com/binisalegend" class="iconfont-archer github" target="_blank" title=github></a>
            
        
    
        
            
                <span class="iconfont-archer wechat" title=wechat>
                    
                    <img class="profile-qr" src="/assets/WeChat.jpg" />
                </span>
            
        
    
        
            
                <span class="iconfont-archer qq" title=qq>
                    
                    <img class="profile-qr" src="/assets/QQ.jpg" />
                </span>
            
        
    
        
    
        
    
        
    
        
    
        
    
        
    
        
    
        
    
        
    
        
    
        
    
        
    
        
    
        
    
        
    
        
    


        </div>
    
    <!-- powered by Hexo  -->
    <div class="copyright">
        <span id="hexo-power">Powered by <a href="https://hexo.io/" target="_blank">Hexo</a></span><span class="iconfont-archer power">&#xe635;</span><span id="theme-info">theme <a href="https://github.com/fi3ework/hexo-theme-archer" target="_blank">Archer</a></span>
    </div>
    <!-- website approve for Chinese user -->
    
    <!-- 不蒜子  -->
    
        <div class="busuanzi-container">
            
             
                <span id="busuanzi_container_site_pv">PV: <span id="busuanzi_value_site_pv"></span> :)</span>
            
        </div>
    	
</footer>

        </div>
        <!-- toc -->
        
            <div class="toc-wrapper toc-wrapper-loding" style=







    top:50vh;

>
                <div class="toc-catalog">
                    <span class="iconfont-archer catalog-icon">&#xe613;</span><span>CATALOG</span>
                </div>
                <ol class="toc"><li class="toc-item toc-level-1"><a class="toc-link" href="#%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%E6%A6%82%E5%BF%B5%E4%BB%8B%E7%BB%8D"><span class="toc-number">1.</span> <span class="toc-text">强化学习概念介绍</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#%E6%9C%BA%E5%99%A8%E7%8B%97%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%E5%AE%9E%E4%BE%8B-RoomDog-py"><span class="toc-number">2.</span> <span class="toc-text">机器狗强化学习实例(RoomDog.py)</span></a></li></ol>
            </div>
        
        <!-- sidebar -->
        <div class="sidebar sidebar-hide">
    <ul class="sidebar-tabs sidebar-tabs-active-0">
        <li class="sidebar-tab-archives"><span class="iconfont-archer">&#xe67d;</span><span class="tab-name">Archive</span></li>
        <li class="sidebar-tab-tags"><span class="iconfont-archer">&#xe61b;</span><span class="tab-name">Tag</span></li>
        <li class="sidebar-tab-categories"><span class="iconfont-archer">&#xe666;</span><span class="tab-name">Cate</span></li>
    </ul>
    <div class="sidebar-content sidebar-content-show-archive">
        <div class="sidebar-panel-archives">
    <!-- 在 ejs 中将 archive 按照时间排序 -->
    
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
    
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
        
    
    
    
    
    <div class="total-and-search">
        <div class="total-archive">
        Total : 8
        </div>
        <!-- search  -->
        
    </div>
    
    <div class="post-archive">
    
        
            
            
            <div class="archive-year"> 2023 </div>
            <ul class="year-list">
            
        
        <li class="archive-post-item">
            <span class="archive-post-date">11/08</span>
            <a class="archive-post-title" href="/2023/11/08/%E6%95%B0%E6%8D%AE%E7%BB%93%E6%9E%84%E4%B8%8E%E7%AE%97%E6%B3%95/">数据结构与算法</a>
        </li>
    
        
        <li class="archive-post-item">
            <span class="archive-post-date">11/08</span>
            <a class="archive-post-title" href="/2023/11/08/C++%E4%B8%AD%E7%9A%84STL%E5%AE%B9%E5%99%A8/">C++中的STL容器</a>
        </li>
    
        
        <li class="archive-post-item">
            <span class="archive-post-date">11/08</span>
            <a class="archive-post-title" href="/2023/11/08/Python%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0/">Python学习笔记</a>
        </li>
    
        
        <li class="archive-post-item">
            <span class="archive-post-date">11/08</span>
            <a class="archive-post-title" href="/2023/11/08/%E5%A4%A7%E4%BA%8C%E4%B8%8A%E5%B0%8F%E5%AD%A6%E6%9C%9F%E7%A8%8B%E8%AE%BE%E8%A7%A3%E9%A2%98%E6%80%9D%E8%B7%AF/">大二上小学期程设解题思路</a>
        </li>
    
        
        <li class="archive-post-item">
            <span class="archive-post-date">11/08</span>
            <a class="archive-post-title" href="/2023/11/08/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0/">强化学习笔记</a>
        </li>
    
        
        <li class="archive-post-item">
            <span class="archive-post-date">11/08</span>
            <a class="archive-post-title" href="/2023/11/08/CS61A%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0/">CS61A学习笔记</a>
        </li>
    
        
        <li class="archive-post-item">
            <span class="archive-post-date">11/08</span>
            <a class="archive-post-title" href="/2023/11/08/Missing-Semester%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0/">Missing-Semester学习笔记</a>
        </li>
    
        
        <li class="archive-post-item">
            <span class="archive-post-date">11/07</span>
            <a class="archive-post-title" href="/2023/11/07/Java%E7%A8%8B%E5%BA%8F%E8%AE%BE%E8%AE%A1%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0/">Java程序设计学习笔记</a>
        </li>
    
    </div>
</div>

        <div class="sidebar-panel-tags">
    <div class="sidebar-tags-name">
        
            <span class="sidebar-tag-name" data-tags="Python">
                <span class="iconfont-archer">&#xe606;</span>
                Python
            </span>
        
            <span class="sidebar-tag-name" data-tags="Java语言学习">
                <span class="iconfont-archer">&#xe606;</span>
                Java语言学习
            </span>
        
            <span class="sidebar-tag-name" data-tags="杂技学习">
                <span class="iconfont-archer">&#xe606;</span>
                杂技学习
            </span>
        
            <span class="sidebar-tag-name" data-tags="数据结构">
                <span class="iconfont-archer">&#xe606;</span>
                数据结构
            </span>
        
            <span class="sidebar-tag-name" data-tags="强化学习">
                <span class="iconfont-archer">&#xe606;</span>
                强化学习
            </span>
        
    </div>
    <div class="iconfont-archer sidebar-tags-empty">&#xe678;</div>
    <div class="tag-load-fail" style="display: none; color: #ccc; font-size: 0.6rem;">
        缺失模块，请参考主题文档进行安装配置：https://github.com/fi3ework/hexo-theme-archer#%E5%AE%89%E8%A3%85%E4%B8%BB%E9%A2%98
    </div> 
    <div class="sidebar-tags-list"></div>
</div>

        <div class="sidebar-panel-categories">
    <div class="sidebar-categories-name">
    
    </div>
    <div class="iconfont-archer sidebar-categories-empty">&#xe678;</div>
    <div class="sidebar-categories-list"></div>
</div>

    </div>
</div>

        <!-- site-meta -->
        <script>
    var siteMetaRoot = "/"
    if (siteMetaRoot === "undefined") {
        siteMetaRoot = '/'
    }
    var siteMeta = {
        url: "https://binisalegend.github.io",
        root: siteMetaRoot,
        author: "Binisalegend"
    }
</script>

        <!-- import experimental options here -->
        <!-- Custom Font -->


        <!-- main func -->
        <script src="/scripts/main.js?v=20211217"></script>
        <!-- dark mode -->
        <script src="/scripts/dark.js?v=20211217"></script>
        <!-- fancybox -->
        <script src="https://cdn.jsdelivr.net/npm/@fancyapps/fancybox@3.5.7/dist/jquery.fancybox.min.js" defer></script>
        <!-- algolia -->
        
        <!-- busuanzi -->
        
            <script src="//busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js" async></script>
        
        <!-- CNZZ -->
        
        <!-- async load share.js -->
        
            <script src="/scripts/share.js?v=20211217" async></script>
        
        <!-- mermaid -->
        
    </body>
</html>
